{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": "### 10 数据连接"
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "df_obj1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],\n",
    "                        'data1' : np.random.randint(0,10,7)})\n",
    "df_obj2 = pd.DataFrame({'key': ['a', 'b' ,'d'],\n",
    "                        'data2' : np.random.randint(0,10,3)})\n",
    "print(df_obj1)\n",
    "print('-'*50)\n",
    "print(df_obj2)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:21:17.828960Z",
     "start_time": "2025-01-17T11:21:17.563228Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  key  data1\n",
      "0   b      2\n",
      "1   b      5\n",
      "2   a      4\n",
      "3   c      7\n",
      "4   a      3\n",
      "5   a      2\n",
      "6   b      3\n",
      "--------------------------------------------------\n",
      "  key  data2\n",
      "0   a      1\n",
      "1   b      5\n",
      "2   d      6\n"
     ]
    }
   ],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "source": [
    "# 默认连接使用相同的列名，连接方式是内连接\n",
    "pd.merge(df_obj1, df_obj2)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:21:43.564018Z",
     "start_time": "2025-01-17T11:21:43.554328Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  key  data1  data2\n",
       "0   b      2      5\n",
       "1   b      5      5\n",
       "2   a      4      1\n",
       "3   a      3      1\n",
       "4   a      2      1\n",
       "5   b      3      5"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>b</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>a</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>a</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>b</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "source": [
    "# 左df和右df都拿索引连接(仅对相同的索引行)\n",
    "pd.merge(df_obj1, df_obj2,left_index=True,right_index=True)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:23:13.037917Z",
     "start_time": "2025-01-17T11:23:13.032971Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  key_x  data1 key_y  data2\n",
       "0     b      2     a      1\n",
       "1     b      5     b      5\n",
       "2     a      4     d      6"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key_x</th>\n",
       "      <th>data1</th>\n",
       "      <th>key_y</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>b</td>\n",
       "      <td>2</td>\n",
       "      <td>a</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>4</td>\n",
       "      <td>d</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 3
  },
  {
   "cell_type": "code",
   "source": [
    "# 左表和右表都拿key列来连接\n",
    "pd.merge(df_obj1, df_obj2, on='key')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:24:07.897653Z",
     "start_time": "2025-01-17T11:24:07.893465Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  key  data1  data2\n",
       "0   b      2      5\n",
       "1   b      5      5\n",
       "2   a      4      1\n",
       "3   a      3      1\n",
       "4   a      2      1\n",
       "5   b      3      5"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>b</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>a</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>a</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>b</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "source": [
    "# 更改列名\n",
    "df_obj1 = df_obj1.rename(columns={'key':'key1'})\n",
    "df_obj2 = df_obj2.rename(columns={'key':'key2'})"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:24:38.227346Z",
     "start_time": "2025-01-17T11:24:38.224455Z"
    }
   },
   "outputs": [],
   "execution_count": 5
  },
  {
   "cell_type": "code",
   "source": [
    "df_obj1"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:24:40.964509Z",
     "start_time": "2025-01-17T11:24:40.959746Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  key1  data1\n",
       "0    b      2\n",
       "1    b      5\n",
       "2    a      4\n",
       "3    c      7\n",
       "4    a      3\n",
       "5    a      2\n",
       "6    b      3"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>data1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>b</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>a</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>a</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>b</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 6
  },
  {
   "cell_type": "code",
   "source": [
    "df_obj2"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:24:53.102492Z",
     "start_time": "2025-01-17T11:24:53.098321Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  key2  data2\n",
       "0    a      1\n",
       "1    b      5\n",
       "2    d      6"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key2</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>d</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 7
  },
  {
   "cell_type": "code",
   "source": [
    "# 左表以key1来连接，右表以key2来连接\n",
    "pd.merge(df_obj1, df_obj2, left_on='key1', right_on='key2')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:24:58.366835Z",
     "start_time": "2025-01-17T11:24:58.362015Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  key1  data1 key2  data2\n",
       "0    b      2    b      5\n",
       "1    b      5    b      5\n",
       "2    a      4    a      1\n",
       "3    a      3    a      1\n",
       "4    a      2    a      1\n",
       "5    b      3    b      5"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>data1</th>\n",
       "      <th>key2</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>b</td>\n",
       "      <td>2</td>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>4</td>\n",
       "      <td>a</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>a</td>\n",
       "      <td>3</td>\n",
       "      <td>a</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>a</td>\n",
       "      <td>2</td>\n",
       "      <td>a</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>b</td>\n",
       "      <td>3</td>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "source": [
    "# 全外连接\n",
    "pd.merge(df_obj1, df_obj2, left_on='key1', right_on='key2',\n",
    "         how='outer')  #全外连接"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:26:36.207869Z",
     "start_time": "2025-01-17T11:26:36.202161Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  key1  data1 key2  data2\n",
       "0    a    4.0    a    1.0\n",
       "1    a    3.0    a    1.0\n",
       "2    a    2.0    a    1.0\n",
       "3    b    2.0    b    5.0\n",
       "4    b    5.0    b    5.0\n",
       "5    b    3.0    b    5.0\n",
       "6    c    7.0  NaN    NaN\n",
       "7  NaN    NaN    d    6.0"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>data1</th>\n",
       "      <th>key2</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a</td>\n",
       "      <td>4.0</td>\n",
       "      <td>a</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>a</td>\n",
       "      <td>3.0</td>\n",
       "      <td>a</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>2.0</td>\n",
       "      <td>a</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>b</td>\n",
       "      <td>2.0</td>\n",
       "      <td>b</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>b</td>\n",
       "      <td>5.0</td>\n",
       "      <td>b</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>b</td>\n",
       "      <td>3.0</td>\n",
       "      <td>b</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>c</td>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>d</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 9
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "data": {
      "text/plain": "  key1  data1 key2  data2\n0    b      6    b    8.0\n1    b      0    b    8.0\n2    a      3    a    5.0\n3    c      9  NaN    NaN\n4    a      3    a    5.0\n5    a      2    a    5.0\n6    b      9    b    8.0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>key1</th>\n      <th>data1</th>\n      <th>key2</th>\n      <th>data2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>b</td>\n      <td>6</td>\n      <td>b</td>\n      <td>8.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>b</td>\n      <td>0</td>\n      <td>b</td>\n      <td>8.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>a</td>\n      <td>3</td>\n      <td>a</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>c</td>\n      <td>9</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>a</td>\n      <td>3</td>\n      <td>a</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>a</td>\n      <td>2</td>\n      <td>a</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>b</td>\n      <td>9</td>\n      <td>b</td>\n      <td>8.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# left等价于left join，左表有右表没有的用nan；右表有左表没有的不要\n",
    "pd.merge(df_obj1, df_obj2, left_on='key1', right_on='key2', how='left')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# right等价于数据库的right join\n",
    "pd.merge(df_obj1, df_obj2, left_on='key1', right_on='key2', how='right')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:29:46.262415Z",
     "start_time": "2025-01-17T11:29:46.257165Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  key1  data1 key2  data2\n",
       "0    a    4.0    a      1\n",
       "1    a    3.0    a      1\n",
       "2    a    2.0    a      1\n",
       "3    b    2.0    b      5\n",
       "4    b    5.0    b      5\n",
       "5    b    3.0    b      5\n",
       "6  NaN    NaN    d      6"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>data1</th>\n",
       "      <th>key2</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a</td>\n",
       "      <td>4.0</td>\n",
       "      <td>a</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>a</td>\n",
       "      <td>3.0</td>\n",
       "      <td>a</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>2.0</td>\n",
       "      <td>a</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>b</td>\n",
       "      <td>2.0</td>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>b</td>\n",
       "      <td>5.0</td>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>b</td>\n",
       "      <td>3.0</td>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>d</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 10
  },
  {
   "cell_type": "code",
   "source": [
    "# 处理重复列名\n",
    "df_obj1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],\n",
    "                        'data' : np.random.randint(0,10,7)})\n",
    "df_obj2 = pd.DataFrame({'key': ['a', 'b', 'd'],\n",
    "                        'data' : np.random.randint(0,10,3)})\n",
    "print(df_obj1)\n",
    "print('-'*50)\n",
    "print(df_obj2)\n",
    "#给相同的数据列添加后缀\n",
    "print(pd.merge(df_obj1, df_obj2, on='key', suffixes=('_left', '_right')))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:30:30.045725Z",
     "start_time": "2025-01-17T11:30:30.040244Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  key  data\n",
      "0   b     0\n",
      "1   b     7\n",
      "2   a     6\n",
      "3   c     9\n",
      "4   a     0\n",
      "5   a     5\n",
      "6   b     9\n",
      "--------------------------------------------------\n",
      "  key  data\n",
      "0   a     2\n",
      "1   b     9\n",
      "2   d     4\n",
      "  key  data_left  data_right\n",
      "0   b          0           9\n",
      "1   b          7           9\n",
      "2   a          6           2\n",
      "3   a          0           2\n",
      "4   a          5           2\n",
      "5   b          9           9\n"
     ]
    }
   ],
   "execution_count": 12
  },
  {
   "cell_type": "code",
   "source": [
    "# 按索引连接\n",
    "df_obj1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],\n",
    "                        'data1' : np.random.randint(0,10,7)})\n",
    "df_obj2 = pd.DataFrame({'data2' : np.random.randint(0,10,3)}, index=['a', 'b', 'd'])\n",
    "print(df_obj1)\n",
    "print('-'*50)\n",
    "print(df_obj2)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:31:31.515904Z",
     "start_time": "2025-01-17T11:31:31.511903Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  key  data1\n",
      "0   b      5\n",
      "1   b      8\n",
      "2   a      2\n",
      "3   c      6\n",
      "4   a      7\n",
      "5   a      0\n",
      "6   b      9\n",
      "--------------------------------------------------\n",
      "   data2\n",
      "a      1\n",
      "b      1\n",
      "d      7\n"
     ]
    }
   ],
   "execution_count": 14
  },
  {
   "cell_type": "code",
   "source": [
    "print(pd.merge(df_obj1, df_obj2, left_on='key', right_index=True))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:32:07.236492Z",
     "start_time": "2025-01-17T11:32:07.229526Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  key  data1  data2\n",
      "0   b      5      1\n",
      "1   b      8      1\n",
      "2   a      2      1\n",
      "4   a      7      1\n",
      "5   a      0      1\n",
      "6   b      9      1\n"
     ]
    }
   ],
   "execution_count": 15
  },
  {
   "cell_type": "code",
   "source": [
    "pd.merge(df_obj2,df_obj1, left_index=True, right_on='key')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T11:32:28.615737Z",
     "start_time": "2025-01-17T11:32:28.609799Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "   data2 key  data1\n",
       "2      1   a      2\n",
       "4      1   a      7\n",
       "5      1   a      0\n",
       "0      1   b      5\n",
       "1      1   b      8\n",
       "6      1   b      9"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data2</th>\n",
       "      <th>key</th>\n",
       "      <th>data1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>a</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>a</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>b</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>b</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>b</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 16
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
